library(ggplot2)
library(vegan)
## 载入需要的程辑包:permute
## 载入需要的程辑包:lattice
## This is vegan 2.6-2
library(colorRamps)
library(psych)
## 
## 载入程辑包:'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(tidyr)
library(readxl)
library(ape)
library(ggplot2)
library(colorRamps)
library(ggrepel)

rm(list = ls())
load("data/CForBio.data.prep.2022.11.22.RData")

library(vegan)
cutoff <- 1000

sort(colSums(bac.amp1))
##  BD1623  DL1904    NG14  XS0601 BTM1515  GT0910     NG7  BD1512  TT1419  CB2424 
##   24903   26972   27053   27234   29143   31098   33452   34090   34339   34770 
##  GT2515  TT0302 DHS0824  HS1433  HS1531  TT1201  HS2325 BTM2317  GT0206 DHS0911 
##   34826   35210   35306   35369   35611   35751   36395   36829   36923   37867 
##    LS11 DHS0808 BTM1323     NG8  CB2009  XS0809  DL0810     LS9  DL0906  XS2022 
##   39954   41573   42171   42958   43395   45875   45977   46174   46676   47364 
##     GH1     GH4  BD1203    LS15  CB0122     GH2 
##   48535   54597   55685   60392   65051   65829
sort(colSums(bac.mgm1))
##   HS1433   BD1623      NG7   XS2022  DHS0824  DHS0808   DL1904  BTM1323 
##  9115574  9597402  9667544  9737388  9875341 10013817 10016208 10110020 
##   TT1419   HS2325   GT0910   DL0810   GT2515   GT0206   TT0302      NG8 
## 10160188 10164701 10195067 10301253 10395324 10534807 10643744 10648853 
##   XS0809   HS1531     LS11   CB2424  BTM2317   XS0601      GH1   DL0906 
## 10675545 10684100 10970718 11115704 11143274 11320447 11461977 11544870 
##   BD1512      GH2     NG14  DHS0911   CB0122   CB2009   BD1203   TT1201 
## 11738899 11750163 11847992 11987853 12237915 12559810 12632900 12911304 
##      LS9     LS15      GH4  BTM1515 
## 13903753 14521553 14922282 15939544
sort(colSums(ko.mgm1))
##   HS1433   HS1531   BD1512   HS2325  DHS0911   BD1203   CB0122   XS2022 
## 516612.0 554923.2 561838.9 564412.5 570647.8 572166.7 578939.1 581887.7 
##   BD1623   TT1419      GH4   XS0809   CB2009   TT0302  DHS0824   GT0206 
## 585609.5 588950.4 590928.8 591823.2 595951.2 597472.6 597523.9 605316.2 
##   GT0910  DHS0808   XS0601   TT1201      LS9     LS15      NG8   GT2515 
## 607017.0 611526.6 612949.0 618809.8 618974.8 621618.7 623901.4 624726.0 
##     NG14      NG7  BTM1323  BTM1515     LS11  BTM2317   CB2424      GH2 
## 626956.5 630290.3 640564.2 640670.3 653233.2 657988.6 661383.3 664885.4 
##      GH1   DL0906   DL0810   DL1904 
## 670864.8 682216.2 709302.7 712869.3
sort(colSums(cog.mgm1))
##   HS1433   BD1512   CB0122  DHS0911   HS1531   BD1623   HS2325      GH4 
## 665137.6 700954.6 701771.9 706113.6 707706.2 717680.3 719426.8 719907.9 
##   BD1203   TT1419   XS2022   CB2009   XS0809      NG8   TT0302      LS9 
## 720810.2 735415.6 736115.9 736728.3 737294.4 738141.8 741127.0 741129.8 
##  DHS0824     NG14   GT0206      NG7     LS15   GT0910  DHS0808   XS0601 
## 742211.5 745558.0 745871.5 746178.6 747159.8 748678.5 749599.8 753834.3 
##  BTM2317  BTM1323  BTM1515   TT1201     LS11   GT2515   CB2424   DL0906 
## 761668.5 762288.9 766129.1 769869.2 772483.2 780307.3 785374.0 785610.6 
##      GH2      GH1   DL1904   DL0810 
## 806012.5 810165.7 822934.2 825360.4
sort(colSums(rgi.mgm1))
##      GH4   CB0122   CB2009      NG8   BD1623      NG7     NG14   DL0906 
## 20995.21 23022.58 24293.34 24840.47 25081.28 25099.30 25294.91 25324.26 
##   BD1512     LS15   HS1433      LS9   TT1419     LS11   BD1203  BTM2317 
## 25426.07 25842.94 25856.23 25965.01 26532.75 26701.09 26808.88 27092.64 
##  DHS0911   TT0302   XS0601   XS0809   CB2424   XS2022   GT0206  BTM1515 
## 27105.60 27138.77 27223.78 27593.08 27629.52 27753.26 27775.72 27778.20 
##   GT0910   TT1201      GH2   DL0810   DL1904   HS2325  DHS0824  DHS0808 
## 28043.34 28054.02 28314.02 28403.45 28474.85 28496.89 28701.60 28718.88 
##      GH1   HS1531  BTM1323   GT2515 
## 28730.37 29012.93 29564.26 30208.16
sort(colSums(cazy.mgm1))
##   BD1623      GH4   BD1512      NG8      NG7     NG14   CB0122   XS0601 
## 13535.55 13791.28 14072.59 14476.19 14543.41 14716.72 14718.56 14858.62 
##   CB2009   BD1203   GT0910   GT0206   DL0906   CB2424      LS9     LS15 
## 15158.24 15372.90 15421.68 15423.15 15490.17 15512.52 15621.44 15661.32 
##     LS11  BTM2317   XS0809   HS1433   HS2325   XS2022   DL1904   DL0810 
## 15742.96 15933.04 16022.71 16071.16 16630.05 16715.92 16996.28 17006.50 
##      GH2   GT2515  DHS0911      GH1   HS1531  BTM1515   TT1419  DHS0824 
## 17012.91 17096.55 17106.44 17144.00 17227.44 17361.87 17600.44 17729.42 
##  BTM1323   TT0302   TT1201  DHS0808 
## 17929.15 18253.39 18461.59 18792.74
bac.amp <- rrarefy(t(bac.amp1),sample = 24903 )
bac.mgm <- data.frame(t(bac.mgm1))

ko.mgm <- data.frame(t(ko.mgm1))
cog.mgm <- data.frame(t(cog.mgm1))
rgi.mgm <- data.frame(t(rgi.mgm1))
cazy.mgm <- data.frame(t(cazy.mgm1))

env.amp1$rgi.mgm.H <- vegan::diversity(rgi.mgm)
env.amp1$cazy.mgm.H <- vegan::diversity(cazy.mgm)
env.amp1$ko.mgm.H <- vegan::diversity(ko.mgm)
env.amp1$cog.mgm.H <- vegan::diversity(cog.mgm)
env.amp1$bac.mgm.H <- vegan::diversity(bac.mgm)
env.amp1$bac.amp.H <- vegan::diversity(bac.amp)

env.amp1$rgi.mgm.S <- vegan::specnumber(rgi.mgm)
env.amp1$cazy.mgm.S <- vegan::specnumber(cazy.mgm)
env.amp1$ko.mgm.S <- vegan::specnumber(ko.mgm)
env.amp1$cog.mgm.S <- vegan::specnumber(cog.mgm)
env.amp1$bac.mgm.S <- vegan::specnumber(bac.mgm)
env.amp1$bac.amp.S <- vegan::specnumber(bac.amp)

env.amp1$cog.mgm.sum <- rowSums(cog.mgm)
env.amp1$rgi.mgm.sum <- rowSums(rgi.mgm)
env.amp1$cazy.mgm.sum <- rowSums(cazy.mgm)
env.amp1$ko.mgm.sum <- rowSums(ko.mgm)
env.amp1$bac.mgm.sum <- rowSums(bac.mgm)

#genomeTraits
genomeTrait<-read_excel("data/genomeTrait/genomeTraits_contigs500.xlsx",sheet = "Sheet1")

genomeTrait$sample_name == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.genomeTraits<-cbind(env.amp1,genomeTrait[,c(3:18)])
#Supplementary Fig. 1
set.seed(315)
bac.amp.pc<-pcoa(vegdist(decostand(bac.amp,"hellinger"),"bray"))
env.amp.pc<-data.frame(bac.amp.pc$vectors[,c("Axis.1", "Axis.2")], env.amp1)
bac.amp.pc1=round(bac.amp.pc$values$Relative_eig[1], 3)*100
bac.amp.pc2=round(bac.amp.pc$values$Relative_eig[2], 3)*100
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")]; 
colnames(env.sub) <-  c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")  
ef<-envfit(bac.amp.pc$vectors, env.sub, na.rm = TRUE); ef
## 
## ***VECTORS
## 
##                     Axis.1   Axis.2     r2 Pr(>r)    
## Latitude          -0.58162  0.81346 0.7411  0.001 ***
## Longitude         -0.39227  0.91985 0.3107  0.003 ** 
## Altitude          -0.72091  0.69303 0.2809  0.005 ** 
## MAP                0.84119 -0.54073 0.7209  0.001 ***
## MAT                0.57514 -0.81805 0.6895  0.001 ***
## TC                -0.40909  0.91249 0.3319  0.003 ** 
## TN                -0.91344  0.40698 0.2067  0.028 *  
## TP                -0.99964  0.02686 0.4952  0.001 ***
## pH                -0.63857 -0.76956 0.7395  0.001 ***
## ACa               -0.91707 -0.39872 0.6518  0.001 ***
## AMg               -0.61705 -0.78692 0.6484  0.001 ***
## AFe               -0.28103 -0.95970 0.4618  0.001 ***
## AK                 0.30253  0.95314 0.0971  0.195    
## C_N                0.04331  0.99906 0.4170  0.001 ***
## C_P                0.89631  0.44342 0.5332  0.001 ***
## N_P                0.97768  0.21009 0.5716  0.001 ***
## Soil bulk density  0.51270  0.85857 0.3073  0.003 ** 
## Plant abundance    0.67471 -0.73809 0.1116  0.123    
## Plant basal area   0.11058  0.99387 0.1346  0.105    
## Plant richness     0.75771 -0.65259 0.5813  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
bac.amp.ef.arrows<-data.frame(ef$vectors$arrows)
bac.amp.ef.arrows$Variable <- row.names(bac.amp.ef.arrows)
m1<-max(abs(env.amp.pc$Axis.1)); m2<-max(abs(env.amp.pc$Axis.2))

gp1<-ggplot(env.amp.pc) +
  geom_point(mapping = aes(x = Axis.1, y = Axis.2, colour = site.latitude),size=3, alpha = 0.9) +
  labs(x = sprintf("PCo1 (%.1f%%)", bac.amp.pc1), y = sprintf("PCo2 (%.1f%%)", bac.amp.pc2))+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  ggtitle("bac.amp")+
  geom_segment(data = bac.amp.ef.arrows, aes(x = 0, xend = Axis.1 * m1, y = 0, yend = Axis.2 * m2), arrow = arrow(length = unit(0.1, "cm")), colour = "grey") +
  geom_text_repel(data = bac.amp.ef.arrows, aes(x = Axis.1 * m1, y = Axis.2 * m2, label = Variable),size = 3,colour="black")+theme(axis.title = element_text(color="black",face = "bold"),axis.text = element_text(color="black",face="bold"))+theme_bw()

gp1

#ggsave("pdf2/Fig.S5a_2.PcoA.bact.composition.2023.01.05.pdf", width = 6, height = 4)

set.seed(315)
bac.mgm.pc<-pcoa(vegdist(decostand(bac.mgm,"hellinger"),"bray"))
env.amp.pc<-data.frame(bac.mgm.pc$vectors[,c("Axis.1", "Axis.2")], env.amp1)
bac.mgm.pc1=round(bac.mgm.pc$values$Relative_eig[1], 3)*100
bac.mgm.pc2=round(bac.mgm.pc$values$Relative_eig[2], 3)*100
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")]; 
colnames(env.sub) <-  c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")  
ef<-envfit(bac.mgm.pc$vectors, env.sub, na.rm = TRUE); ef
## 
## ***VECTORS
## 
##                     Axis.1   Axis.2     r2 Pr(>r)    
## Latitude           0.77294 -0.63448 0.2325  0.016 *  
## Longitude          0.90654  0.42211 0.0220  0.695    
## Altitude           0.25494 -0.96696 0.2096  0.023 *  
## MAP               -0.89232  0.45141 0.5233  0.001 ***
## MAT               -0.75999  0.64993 0.2027  0.027 *  
## TC                 0.82505 -0.56507 0.0352  0.536    
## TN                 0.99229  0.12393 0.1485  0.080 .  
## TP                 0.99299 -0.11818 0.5525  0.001 ***
## pH                 0.99538  0.09598 0.8623  0.001 ***
## ACa                0.99323  0.11613 0.8357  0.001 ***
## AMg                0.99165  0.12894 0.7697  0.001 ***
## AFe                0.96214  0.27254 0.4073  0.001 ***
## AK                -0.99684  0.07940 0.1232  0.118    
## C_N               -0.83275 -0.55365 0.1489  0.084 .  
## C_P               -0.98222  0.18771 0.5696  0.001 ***
## N_P               -0.96809  0.25059 0.5316  0.001 ***
## Soil bulk density -0.85769  0.51417 0.2538  0.011 *  
## Plant abundance   -0.75417  0.65667 0.0951  0.182    
## Plant basal area  -0.42044 -0.90732 0.2931  0.008 ** 
## Plant richness    -0.98393  0.17853 0.3281  0.002 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
bac.mgm.ef.arrows<-data.frame(ef$vectors$arrows)
bac.mgm.ef.arrows$Variable <- row.names(bac.mgm.ef.arrows)
m1<-max(abs(env.amp.pc$Axis.1)); m2<-max(abs(env.amp.pc$Axis.2))

gp2<-ggplot(env.amp.pc) +
  geom_jitter(mapping = aes(x = Axis.1, y = Axis.2, colour = site.latitude),size=3, alpha = 0.9) +
  labs(x = sprintf("PCo1 (%.1f%%)", bac.mgm.pc1), y = sprintf("PCo2 (%.1f%%)", bac.mgm.pc2))+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  ggtitle("bac.mgm")+
  geom_segment(data = bac.mgm.ef.arrows, aes(x = 0, xend = Axis.1 * m1, y = 0, yend = Axis.2 * m2), arrow = arrow(length = unit(0.1, "cm")), colour = "grey") +
  geom_text_repel(data = bac.mgm.ef.arrows, aes(x = Axis.1 * m1, y = Axis.2 * m2, label = Variable),size = 3)+theme(axis.title = element_text(color="black",face = "bold"),axis.text = element_text(color="black",face="bold"))+theme_bw()

gp2

#ggsave("pdf2/Fig.S5b.PcoA.bact.composition.2022.12.29.pdf", width = 6, height = 4)

set.seed(315)
ko.mgm.pc<-pcoa(vegdist(decostand(ko.mgm,"hellinger"),"bray"))
env.amp.pc<-data.frame(ko.mgm.pc$vectors[,c("Axis.1", "Axis.2")], env.amp1)
ko.mgm.pc1=round(ko.mgm.pc$values$Relative_eig[1], 3)*100
ko.mgm.pc2=round(ko.mgm.pc$values$Relative_eig[2], 3)*100
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")]; 
colnames(env.sub) <-  c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")  
ef<-envfit(ko.mgm.pc$vectors, env.sub, na.rm = TRUE); ef
## 
## ***VECTORS
## 
##                     Axis.1   Axis.2     r2 Pr(>r)    
## Latitude           0.33207 -0.94325 0.8185  0.001 ***
## Longitude          0.20850 -0.97802 0.3417  0.001 ***
## Altitude           0.21586 -0.97642 0.2179  0.021 *  
## MAP               -0.61804  0.78614 0.7433  0.001 ***
## MAT               -0.30433  0.95257 0.7992  0.001 ***
## TC                 0.27507 -0.96143 0.3210  0.002 ** 
## TN                 0.80660 -0.59110 0.2397  0.010 ** 
## TP                 0.87791 -0.47883 0.6641  0.001 ***
## pH                 0.99839  0.05665 0.8542  0.001 ***
## ACa                0.94745 -0.31991 0.8691  0.001 ***
## AMg                0.97584  0.21849 0.7660  0.001 ***
## AFe                0.68100  0.73228 0.5953  0.001 ***
## AK                -0.53732 -0.84338 0.2051  0.025 *  
## C_N               -0.25060 -0.96809 0.5882  0.001 ***
## C_P               -0.99973  0.02317 0.5825  0.001 ***
## N_P               -0.93871  0.34472 0.5918  0.001 ***
## Soil bulk density -0.99807 -0.06204 0.2029  0.023 *  
## Plant abundance   -0.82764  0.56127 0.0713  0.276    
## Plant basal area  -0.49375 -0.86960 0.2786  0.009 ** 
## Plant richness    -0.51766  0.85559 0.6935  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
ko.mgm.ef.arrows<-data.frame(ef$vectors$arrows)
ko.mgm.ef.arrows$Variable <- row.names(ko.mgm.ef.arrows)
m1<-max(abs(env.amp.pc$Axis.1)); m2<-max(abs(env.amp.pc$Axis.2))

gp3<-ggplot(env.amp.pc) +
  geom_point(mapping = aes(x = Axis.1, y = Axis.2, colour = site.latitude),size=3, alpha = 0.9) +
  labs(x = sprintf("PCo1 (%.1f%%)", ko.mgm.pc1), y = sprintf("PCo2 (%.1f%%)", ko.mgm.pc2))+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  ggtitle("ko.mgm")+
  geom_segment(data = ko.mgm.ef.arrows, aes(x = 0, xend = Axis.1 * m1, y = 0, yend = Axis.2 * m2), arrow = arrow(length = unit(0.1, "cm")), colour = "grey") +
  geom_text_repel(data = ko.mgm.ef.arrows, aes(x = Axis.1 * m1, y = Axis.2 * m2, label = Variable),size = 3)+theme(axis.title = element_text(color="black",face = "bold"),axis.text = element_text(color="black",face="bold"))+theme_bw()

gp3

#ggsave("pdf2/Fig.S5c.PcoA.KO.composition.2023.01.05.pdf", width = 6, height = 4)

set.seed(315)
rgi.mgm.pc<-pcoa(vegdist(decostand(rgi.mgm,"hellinger"),"bray"))
env.amp.pc<-data.frame(rgi.mgm.pc$vectors[,c("Axis.1", "Axis.2")], env.amp1)
rgi.mgm.pc1=round(rgi.mgm.pc$values$Relative_eig[1], 3)*100
rgi.mgm.pc2=round(rgi.mgm.pc$values$Relative_eig[2], 3)*100
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")]; 
colnames(env.sub) <-  c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")  
ef<-envfit(rgi.mgm.pc$vectors, env.sub, na.rm = TRUE); ef
## 
## ***VECTORS
## 
##                     Axis.1   Axis.2     r2 Pr(>r)    
## Latitude           0.42041 -0.90733 0.6997  0.001 ***
## Longitude          0.36617 -0.93055 0.2295  0.014 *  
## Altitude           0.16282 -0.98666 0.3524  0.003 ** 
## MAP               -0.66560  0.74631 0.7434  0.001 ***
## MAT               -0.39123  0.92029 0.6587  0.001 ***
## TC                 0.48621 -0.87384 0.2164  0.014 *  
## TN                 0.95321 -0.30231 0.2706  0.006 ** 
## TP                 0.93787 -0.34698 0.7011  0.001 ***
## pH                 0.99811 -0.06138 0.7752  0.001 ***
## ACa                0.93096 -0.36513 0.8717  0.001 ***
## AMg                0.99595  0.08995 0.7199  0.001 ***
## AFe                0.73353  0.67966 0.5364  0.001 ***
## AK                -0.59416 -0.80435 0.1883  0.033 *  
## C_N               -0.31710 -0.94839 0.3682  0.001 ***
## C_P               -0.99648  0.08382 0.6073  0.001 ***
## N_P               -0.94882  0.31582 0.6166  0.001 ***
## Soil bulk density -0.95857  0.28486 0.1732  0.045 *  
## Plant abundance   -0.72661  0.68705 0.0952  0.182    
## Plant basal area  -0.52424 -0.85157 0.2682  0.010 ** 
## Plant richness    -0.60895  0.79321 0.6717  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
rgi.mgm.ef.arrows<-data.frame(ef$vectors$arrows)
rgi.mgm.ef.arrows$Variable <- row.names(rgi.mgm.ef.arrows)
m1<-max(abs(env.amp.pc$Axis.1)); m2<-max(abs(env.amp.pc$Axis.2))

gp4<-ggplot(env.amp.pc) +
  geom_point(mapping = aes(x = Axis.1, y = Axis.2, colour = site.latitude),size=3, alpha = 0.9) +
  labs(x = sprintf("PCo1 (%.1f%%)", rgi.mgm.pc1), y = sprintf("PCo2 (%.1f%%)", rgi.mgm.pc2))+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  ggtitle("arg.mgm")+
  geom_segment(data = rgi.mgm.ef.arrows, aes(x = 0, xend = Axis.1 * m1, y = 0, yend = Axis.2 * m2), arrow = arrow(length = unit(0.1, "cm")), colour = "grey") +
  geom_text_repel(data = rgi.mgm.ef.arrows, aes(x = Axis.1 * m1, y = Axis.2 * m2, label = Variable),size = 3)+theme(axis.title = element_text(color="black",face = "bold"),axis.text = element_text(color="black",face="bold"))+theme_bw()

gp4

#ggsave("pdf2/Fig.S5d.PcoA.ARG.composition.2023.01.05.pdf", width = 6, height = 4)

set.seed(315)
cazy.mgm.pc<-pcoa(vegdist(decostand(cazy.mgm,"hellinger"),"bray"))
env.amp.pc<-data.frame(cazy.mgm.pc$vectors[,c("Axis.1", "Axis.2")], env.amp1)
cazy.mgm.pc1=round(cazy.mgm.pc$values$Relative_eig[1], 3)*100
cazy.mgm.pc2=round(cazy.mgm.pc$values$Relative_eig[2], 3)*100
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")]; 
colnames(env.sub) <-  c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")  
ef<-envfit(cazy.mgm.pc$vectors, env.sub, na.rm = TRUE); ef
## 
## ***VECTORS
## 
##                     Axis.1   Axis.2     r2 Pr(>r)    
## Latitude           0.56039 -0.82823 0.4734  0.001 ***
## Longitude          0.50368 -0.86389 0.1369  0.083 .  
## Altitude           0.22787 -0.97369 0.3392  0.003 ** 
## MAP               -0.89544  0.44519 0.5415  0.001 ***
## MAT               -0.53433  0.84528 0.4425  0.001 ***
## TC                 0.46764 -0.88392 0.2316  0.010 ** 
## TN                 0.84644 -0.53249 0.3241  0.003 ** 
## TP                 0.97370 -0.22783 0.6940  0.001 ***
## pH                 0.88977  0.45641 0.8580  0.001 ***
## ACa                0.95824  0.28597 0.8356  0.001 ***
## AMg                0.87404  0.48586 0.7377  0.001 ***
## AFe                0.74281  0.66950 0.5055  0.001 ***
## AK                -0.57390 -0.81892 0.1541  0.058 .  
## C_N               -0.31248 -0.94993 0.1956  0.020 *  
## C_P               -0.97849  0.20628 0.5798  0.001 ***
## N_P               -0.91898  0.39431 0.5993  0.001 ***
## Soil bulk density -0.98394  0.17852 0.2034  0.034 *  
## Plant abundance   -0.81619  0.57778 0.0740  0.281    
## Plant basal area  -0.26681 -0.96375 0.2845  0.008 ** 
## Plant richness    -0.83938  0.54355 0.4920  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
cazy.mgm.ef.arrows<-data.frame(ef$vectors$arrows)
cazy.mgm.ef.arrows$Variable <- row.names(cazy.mgm.ef.arrows)
m1<-max(abs(env.amp.pc$Axis.1)); m2<-max(abs(env.amp.pc$Axis.2))

gp5<-ggplot(env.amp.pc) +
  geom_point(mapping = aes(x = Axis.1, y = Axis.2 * -1, colour = site.latitude),size=3, alpha = 0.9) +
  labs(x = sprintf("PCo1 (%.1f%%)", cazy.mgm.pc1), y = sprintf("PCo2 (%.1f%%)", cazy.mgm.pc2))+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  ggtitle("cazy.mgm")+
  geom_segment(data = cazy.mgm.ef.arrows, aes(x = 0, xend = Axis.1 * m1, y = 0, yend = Axis.2 * m2), arrow = arrow(length = unit(0.1, "cm")), colour = "grey") +
  geom_text_repel(data = cazy.mgm.ef.arrows, aes(x = Axis.1 * m1, y = Axis.2 * m2, label = Variable),size = 3)+theme(axis.title = element_text(color="black",face = "bold"),axis.text = element_text(color="black",face="bold"))+theme_bw()
gp5

#ggsave("pdf2/Fig.S5e.PcoA.CAZy.composition.2023.01.05.pdf", width = 6, height = 4)
#Supplementary Fig. 2
p1<-ggplot() + geom_jitter(data=env.amp1,  height = 0, aes(x= pH, y=bac.amp.H, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.amp1,aes(x= pH, y=bac.amp.H), method ="lm", col= "white")+
  labs(x = "Soil pH",y = "H'.16S")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6.5,y=6.3, label="R2 = 0.481\nP = 1.653e-06",
           size=5,color="black")
p1

#ggsave("pdf/Fig.1a.corr.pH.H16S.2023.04.18.pdf", width = 4, height = 3)
summary(lm(env.amp1$bac.amp.H~env.amp1$pH))
## 
## Call:
## lm(formula = env.amp1$bac.amp.H ~ env.amp1$pH)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40585 -0.16282 -0.04642  0.14106  0.50752 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.56163    0.18483  30.090  < 2e-16 ***
## env.amp1$pH  0.21297    0.03683   5.782 1.65e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2254 on 34 degrees of freedom
## Multiple R-squared:  0.4958, Adjusted R-squared:  0.481 
## F-statistic: 33.43 on 1 and 34 DF,  p-value: 1.653e-06
shapiro.test(residuals(lm(env.amp1$bac.amp.H~env.amp1$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.amp1$bac.amp.H ~ env.amp1$pH))
## W = 0.97818, p-value = 0.6835
p3<-ggplot() + geom_jitter(data=env.amp1,  height = 0, aes(x= pH, y=ko.mgm.H, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.amp1,aes(x= pH, y=ko.mgm.H), col= "white")+
  labs(x = "Soil pH",y = "H'.KO")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6.5,y=7.9, label="R2 = 0.199\nP =  0.009",
           size=5,color="black")
p3

#ggsave("pdf/Fig.1c.corr.pH.HKO.2022.12.29.pdf", width = 4, height = 3)
m2<-lm(env.amp1$ko.mgm.H ~ env.amp1$pH +  I(env.amp1$pH^2)  )
AIC(m2)
## [1] -158.4754
summary(m2)
## 
## Call:
## lm(formula = env.amp1$ko.mgm.H ~ env.amp1$pH + I(env.amp1$pH^2))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.055544 -0.016888  0.003081  0.019042  0.055272 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       8.104670   0.117085  69.221   <2e-16 ***
## env.amp1$pH      -0.087095   0.045769  -1.903   0.0658 .  
## I(env.amp1$pH^2)  0.007155   0.004317   1.657   0.1069    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02503 on 33 degrees of freedom
## Multiple R-squared:  0.245,  Adjusted R-squared:  0.1992 
## F-statistic: 5.354 on 2 and 33 DF,  p-value: 0.009689
shapiro.test(residuals(m2))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(m2)
## W = 0.95967, p-value = 0.21
p5<-ggplot() + geom_jitter(data=env.amp1,  height = 0, aes(x= pH, y = rgi.mgm.H, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.amp1,aes(x= pH, y=rgi.mgm.H), method = "lm", color = "white")+
  labs(x = "Soil pH",y = "H'.ARG")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6.5,y=4.9, label="R2 = 0.190\nP = 0.004",
           size=5,color="black")
p5

summary(lm(env.amp1$rgi.mgm.H~env.amp1$pH))
## 
## Call:
## lm(formula = env.amp1$rgi.mgm.H ~ env.amp1$pH)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.040812 -0.012858  0.000664  0.014349  0.049838 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.917865   0.017772 276.712  < 2e-16 ***
## env.amp1$pH -0.010755   0.003542  -3.037  0.00457 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02168 on 34 degrees of freedom
## Multiple R-squared:  0.2133, Adjusted R-squared:  0.1902 
## F-statistic: 9.221 on 1 and 34 DF,  p-value: 0.004571
shapiro.test(residuals(lm(env.amp1$rgi.mgm.H~env.amp1$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.amp1$rgi.mgm.H ~ env.amp1$pH))
## W = 0.98332, p-value = 0.8506
#ggsave("pdf2/Fig.2b.corr.pH.HARG.2022.12.29.pdf", width = 4, height = 3)

p7<-ggplot() + geom_jitter(data=env.amp1,  height = 0, aes(x= pH, y = cazy.mgm.H, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.amp1,aes(x= pH, y=cazy.mgm.H), method = "lm", color = "white")+
  labs(x = "Soil pH",y = "H'.Cazy")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6.5,y=3.55, label="R2 = 0.833\nP = 5.52e-15",
           size=5,color="black")
p7

summary(lm(env.amp1$cazy.mgm.H~env.amp1$pH))
## 
## Call:
## lm(formula = env.amp1$cazy.mgm.H ~ env.amp1$pH)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.033579 -0.011907 -0.002831  0.011534  0.057275 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.729231   0.017529  212.74  < 2e-16 ***
## env.amp1$pH -0.046286   0.003493  -13.25 5.52e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02138 on 34 degrees of freedom
## Multiple R-squared:  0.8378, Adjusted R-squared:  0.833 
## F-statistic: 175.6 on 1 and 34 DF,  p-value: 5.52e-15
shapiro.test(residuals(lm(env.amp1$cazy.mgm.H~env.amp1$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.amp1$cazy.mgm.H ~ env.amp1$pH))
## W = 0.97247, p-value = 0.4966
#ggsave("pdf2/Fig.2c.corr.pH.HCazy.2022.12.29.pdf", width = 4, height = 3)

p1.1<-ggplot() + geom_jitter(data=env.amp1,  height = 0, aes(x= round(ko.mgm.H,digits = 2), y=bac.amp.H, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.amp1,aes(x= ko.mgm.H, y=bac.amp.H), method ="lm", col= "white")+
  labs(x = "H'.KO",y = "H'.16S")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=7.87,y=7.2, label="R2 = 0.448\nP = 4.919e-06",
           size=5,color="black")
p1.1

#ggsave("pdf/Fig.1b.corr.pH.H16S.2023.04.18.pdf", width = 4, height = 3)
summary(lm(env.amp1$bac.amp.S~env.amp1$ko.mgm.S))
## 
## Call:
## lm(formula = env.amp1$bac.amp.S ~ env.amp1$ko.mgm.S)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -791.69 -335.78  -52.81  349.68  876.01 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       19461.2002  2955.9843   6.584 1.52e-07 ***
## env.amp1$ko.mgm.S    -2.0217     0.3731  -5.418 4.92e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 492.1 on 34 degrees of freedom
## Multiple R-squared:  0.4633, Adjusted R-squared:  0.4476 
## F-statistic: 29.36 on 1 and 34 DF,  p-value: 4.919e-06
shapiro.test(residuals(lm(env.amp1$bac.amp.S~env.amp1$ko.mgm.S)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.amp1$bac.amp.S ~ env.amp1$ko.mgm.S))
## W = 0.95219, p-value = 0.1225
p3.1<-ggplot() + geom_jitter(data=env.genomeTraits,  height = 0, aes(x= AGS2, y=round(ko.mgm.H,digits = 2), color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.genomeTraits,aes(x= AGS2, y=ko.mgm.H), method = "lm",col= "white")+
  labs(x = "Average Genome Size (Mbp)",y = "H'.KO")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6,y=7.89, label="R2 = 0.003\nP = 0.721",
           size=5,color="black")

p3.1

#ggsave("pdf2/Fig.2a.corr.pH.HKO.2022.12.29.pdf", width = 4, height = 3)
summary(lm(env.genomeTraits$ko.mgm.H~env.genomeTraits$AGS2))
## 
## Call:
## lm(formula = env.genomeTraits$ko.mgm.H ~ env.genomeTraits$AGS2)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.057311 -0.020494 -0.004491  0.020100  0.050093 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            7.863868   0.019854  396.08   <2e-16 ***
## env.genomeTraits$AGS2 -0.001558   0.004321   -0.36    0.721    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02833 on 34 degrees of freedom
## Multiple R-squared:  0.003806,   Adjusted R-squared:  -0.02549 
## F-statistic: 0.1299 on 1 and 34 DF,  p-value: 0.7208
shapiro.test(residuals(lm(env.genomeTraits$ko.mgm.H~env.genomeTraits$AGS2)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.genomeTraits$ko.mgm.H ~ env.genomeTraits$AGS2))
## W = 0.97055, p-value = 0.4407
p5.1<-ggplot() + geom_jitter(data=env.genomeTraits,  height = 0, aes(x= AGS2, y=round(rgi.mgm.H,digits = 2), color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.genomeTraits,aes(x= AGS2, y=rgi.mgm.H), method = "lm",col= "white")+
  labs(x = "Average Genome Size (Mbp)",y = "H'.ARG")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6,y=4.9, label="R2 = 0.280\nP = 0.0005",
           size=5,color="black")
p5.1

#ggsave("pdf2/Fig.2a.corr.pH.HKO.2022.12.29.pdf", width = 4, height = 3)
summary(lm(env.genomeTraits$rgi.mgm.H~env.genomeTraits$AGS2))
## 
## Call:
## lm(formula = env.genomeTraits$rgi.mgm.H ~ env.genomeTraits$AGS2)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.048815 -0.013871  0.002497  0.014775  0.035015 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           4.811816   0.014322 335.965  < 2e-16 ***
## env.genomeTraits$AGS2 0.011924   0.003117   3.825 0.000533 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02044 on 34 degrees of freedom
## Multiple R-squared:  0.3008, Adjusted R-squared:  0.2803 
## F-statistic: 14.63 on 1 and 34 DF,  p-value: 0.0005334
shapiro.test(residuals(lm(env.genomeTraits$rgi.mgm.H~env.genomeTraits$AGS2)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.genomeTraits$rgi.mgm.H ~ env.genomeTraits$AGS2))
## W = 0.96851, p-value = 0.3862
p7.1<-ggplot() + geom_jitter(data=env.genomeTraits,  height = 0, aes(x= AGS2, y=cazy.mgm.H, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.genomeTraits,aes(x= AGS2, y=cazy.mgm.H), method = "lm",col= "white")+
  labs(x = "Average Genome Size (Mbp)",y = "H'.Cazy")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6,y=3.6, label="R2 = 0.305\nP = 0.0003",
           size=5,color="black")
p7.1

#ggsave("pdf2/Fig.2a.corr.pH.HKO.2022.12.29.pdf", width = 4, height = 3)
summary(lm(env.genomeTraits$cazy.mgm.H~env.genomeTraits$AGS2))
## 
## Call:
## lm(formula = env.genomeTraits$cazy.mgm.H ~ env.genomeTraits$AGS2)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.080577 -0.029966  0.002461  0.026830  0.081365 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           3.381767   0.030572 110.618  < 2e-16 ***
## env.genomeTraits$AGS2 0.026902   0.006654   4.043 0.000286 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04362 on 34 degrees of freedom
## Multiple R-squared:  0.3247, Adjusted R-squared:  0.3048 
## F-statistic: 16.34 on 1 and 34 DF,  p-value: 0.0002864
shapiro.test(residuals(lm(env.genomeTraits$cazy.mgm.H~env.genomeTraits$AGS2)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.genomeTraits$cazy.mgm.H ~ env.genomeTraits$AGS2))
## W = 0.97741, p-value = 0.6576
#Supplementary Fig. 3
library(linkET)
library(dplyr)

colnames(bac.amp1) == colnames(ko.mgm1)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
bac.amp2<-data.frame(t(bac.amp1))
bac.amp2<-data.frame(apply(bac.amp2,1,function(x){x/sum(x)}))

ko.mgm2<-data.frame(t(ko.mgm1))
ko.mgm2<-data.frame(apply(ko.mgm2,1,function(x){x/sum(x)}))

bac.ko<-cbind(data.frame(t(bac.amp1)),data.frame(t(ko.mgm1)))

env.mantel<-env.genomeTraits[,c("AGS2","ACN8","GC_all","Growthrate",
                                "bac.amp.S","bac.amp.H","ko.mgm.S","ko.mgm.H",
                                "latitude","longitude","altitude","MAP","MAT",
                                "TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N",
                                "C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")]

colnames(env.mantel)<-c("AGS","ACN","GC content","Doubling time",
                        "S.16S","H'.16S","S.KO","H'.KO",
                        "Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")
#plot
mantel <- mantel_test(bac.ko, env.mantel,
                      spec_select = list(BacteriaCom = 1:22579,
                                         FunctionCom = 22580:33644),
                      spec_dist =  dist_func(.FUN = "vegdist", method = "bray"),
                      env_dist = dist_func(.FUN = "vegdist", method = "euclidean")) %>% 
  mutate(rd = cut(r, breaks = c(-Inf, 0.2, 0.4, Inf),
                  labels = c("< 0.2", "0.2 - 0.4", ">= 0.4")),
         pd = cut(p, breaks = c(-Inf, 0.01, 0.05, Inf),
                  labels = c("< 0.01", "0.01 - 0.05", ">= 0.05")))

qcorrplot(correlate(env.mantel,method = "spearman"),type = "lower", diag = FALSE,
          grid_size = 0.25) +
  geom_square() +
  geom_mark(sep = '\n',size=5,sig_level = c(0.05,0.01,0.001),sig_thres = 0.05,
            only_mark = TRUE)+
  geom_couple(aes(colour = pd, size = rd), data = mantel, curvature = 0.1) +
  #scale_fill_gradientn(colours = RColorBrewer::brewer.pal(11, "RdYlBu")) +
  scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
  scale_size_manual(values = c(0.5, 1, 2)) +
  scale_colour_manual(values = c("#f6e8c3", "#c7eae5", "#A2A2A288")) +
  #scale_colour_manual(values = color_pal(3, 0)) +
  guides(size = guide_legend(title = "Mantel's r",
                             override.aes = list(colour = "grey35"), 
                             order = 2),
         colour = guide_legend(title = "Mantel's p", 
                               override.aes = list(size = 3), 
                               order = 1),
         fill = guide_colorbar(title = "Spearman's rho", order = 3))

#Supplementary Fig. 4
p1<-ggplot() + geom_jitter(data=env.amp1,  height = 0, aes(x= pH, y=bac.mgm.H, color = site.latitude), alpha = 0.8, size =5) + 
  #geom_smooth(data=env.amp1,aes(x= pH, y=bac.mgm.H), method ="lm", col= "white")+
  labs(x = "Soil pH",y = "H'.Bacteria.mgm")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6.5,y=6.3, label="R2 = 0.009\nP = 0.253",
           size=5,color="black")
p1

#ggsave("pdf2/Fig.S2A.corr.pH.H16S.2023.04.18.pdf", width = 6, height = 4)
summary(lm(env.amp1$bac.mgm.H~env.amp1$pH))
## 
## Call:
## lm(formula = env.amp1$bac.mgm.H ~ env.amp1$pH)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63980 -0.18019  0.01608  0.20693  0.48888 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.34786    0.23546  26.960   <2e-16 ***
## env.amp1$pH  0.05446    0.04692   1.161    0.254    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2872 on 34 degrees of freedom
## Multiple R-squared:  0.03812,    Adjusted R-squared:  0.009826 
## F-statistic: 1.347 on 1 and 34 DF,  p-value: 0.2538
shapiro.test(residuals(lm(env.amp1$bac.mgm.H~env.amp1$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.amp1$bac.mgm.H ~ env.amp1$pH))
## W = 0.98188, p-value = 0.807
library(lme4)
## 载入需要的程辑包:Matrix
## 
## 载入程辑包:'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(lmerTest)
## 
## 载入程辑包:'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(sjPlot)
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
lm<-lmer(bac.amp.H~1+pH+(1|latitude),data = env.amp1,
         REML = FALSE,control=lmerControl(optimizer="bobyqa"))

shapiro.test(residuals(lm))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm)
## W = 0.97453, p-value = 0.5614
summary(lm)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: bac.amp.H ~ 1 + pH + (1 | latitude)
##    Data: env.amp1
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##     -0.6      5.8      4.3     -8.6       32 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8410 -0.6192 -0.1238  0.4461  1.9018 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  latitude (Intercept) 0.009959 0.09979 
##  Residual             0.038044 0.19505 
## Number of obs: 36, groups:  latitude, 12
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  5.57559    0.21061 13.64329   26.47 4.01e-13 ***
## pH           0.21013    0.04194 13.72478    5.01 0.000203 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##    (Intr)
## pH -0.979
tab_model(lm)
  bac.amp.H
Predictors Estimates CI p
(Intercept) 5.58 5.15 – 6.00 <0.001
pH 0.21 0.12 – 0.30 <0.001
Random Effects
σ2 0.04
τ00 latitude 0.01
ICC 0.21
N latitude 12
Observations 36
Marginal R2 / Conditional R2 0.496 / 0.601
p2<-ggplot() + geom_jitter(data=env.amp1,  height = 0, aes(x= pH, y=bac.mgm.S, color = site.latitude), alpha = 0.8, size =5) + 
  #geom_smooth(data=env.amp1,aes(x= pH, y=bac.mgm.S), method ="loess", col= "white")+
  labs(x = "Soil pH",y = "S.Bacteria.mgm")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6.5,y=33500, label="R2 = 0.0184\nP = 0.429",
           size=5,color="black")
p2

#ggsave("pdf2/Fig.S2B.corr.pH.H16S.2023.04.18.pdf", width = 6, height = 4)
summary(lm(env.amp1$bac.mgm.S~env.amp1$pH))
## 
## Call:
## lm(formula = env.amp1$bac.mgm.S ~ env.amp1$pH)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -827.16 -280.83    0.17  155.35  958.18 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 33869.01     322.51   105.0   <2e-16 ***
## env.amp1$pH    51.42      64.27     0.8    0.429    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 393.3 on 34 degrees of freedom
## Multiple R-squared:  0.01848,    Adjusted R-squared:  -0.01039 
## F-statistic: 0.6402 on 1 and 34 DF,  p-value: 0.4292
shapiro.test(residuals(lm(env.amp1$bac.mgm.S~env.amp1$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.amp1$bac.mgm.S ~ env.amp1$pH))
## W = 0.96964, p-value = 0.4158
#Supplementary Fig. 5
env.amp1$sample_name==env.genomeTraits$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                     "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                     "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(env.amp1[,c("bac.mgm.H","bac.mgm.S","bac.amp.H","bac.amp.S", "ko.mgm.H","ko.mgm.S","rgi.mgm.H","rgi.mgm.S", "cazy.mgm.H","cazy.mgm.S")])

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(r)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(cor.out$r, 2)


cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))

cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

cor.out$Y <- factor(cor.out$Y, levels = c("bac.mgm.H","bac.mgm.S","bac.amp.H","bac.amp.S", "ko.mgm.H","ko.mgm.S","rgi.mgm.H","rgi.mgm.S", "cazy.mgm.H","cazy.mgm.S"))

cor.out$Y <- factor(cor.out$Y, labels = c("H'.bacteria.mgm","S.bacteria.mgm","H'.16S","S.16S","H'.KO","S.KO","H'.ARG","S.ARG","H'.Cazy","S.Cazy"))

ggplot(cor.out, aes(Y,X)) + 
  geom_tile(aes(fill = r), size=1)+
  geom_text(aes(label = r), size = 1.5)+
  scale_fill_gradient(guide = "legend", high='green', low='blue')+
  theme(axis.title= element_blank(),
        axis.text.x=element_text(colour="black", size=8, face="bold",angle = 20),
        axis.text.y=element_text(colour="black", size=8, face="bold"))

cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0

p4<-ggplot(cor.out.sig, aes(Y,X)) + 
  geom_tile(aes(fill = r), size=1)+
  geom_text(data=  cor.out.sig[cor.out.sig$r!=0,], aes(label = r), size = 2.5, font="bold")+
  scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
   geom_hline(yintercept = c(8.5,9.5), color = "red")+
   theme(axis.title= element_blank(),
        axis.text.x=element_text(colour="black", size=12, face="bold",angle = 90, hjust=1,vjust = 0.5),
        axis.text.y=element_text(colour="black", size=12, face="bold"))
p4

#Supplementary Fig. 6a
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(cog.mgm1))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:480] 0.2 0.15 0.03 -0.43 -0.12 -0.08 -0.01 0.24 0.58 0.49 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

library(splitstackshape)

cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
cor.out.sig$Y_2<-"COGs"

ggplot(cor.out.sig, aes(Y,X)) +
  facet_grid(.~ Y_2,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  geom_text(data=  cor.out.sig[cor.out.sig$r!=0,], aes(label = r), size = 2, font="bold")+
  geom_hline(yintercept = c(8.5,9.5), color = "red")+
  scale_fill_gradient(guide = "legend", high='green', low='blue')+
  scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
  theme(axis.title= element_blank(),
        strip.text = element_text(colour="black",size = 12),
        axis.text.x=element_text(colour="black", size=12, face="bold"),
        axis.text.y=element_text(colour="black", size=12, face="bold"))

#ggsave("pdf2/Fig.S3A.corrHeatmap.env.cog.genes.2023.01.05.pdf", width = 10, height = 4)
#Supplementary Fig. 6b
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.mgm1))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:221300] -0.06 -0.01 0.25 0.13 -0.03 -0.19 -0.28 -0.33 -0.31 -0.2 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))

cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

library(splitstackshape)

cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
cor.out.sig$Y_2<-"All annotated KOs"
  
ggplot(cor.out.sig, aes(Y,X)) +
  facet_grid(.~ Y_2,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  geom_hline(yintercept = c(8.5,9.5), color = "red")+
  scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
  theme(strip.text = element_text(size = 12,face="bold", color = c("black")),
        #strip.background = element_rect(fill = "grey"),
        panel.spacing = unit(0.1, "lines"),
        axis.title= element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y=element_text(colour="black", size=15, face="bold"))

#ggsave("pdf2/Fig.S3B.corrHeatmap.env.ko.genes.2023.01.05.pdf", width = 20, height = 5)
#Supplementary Fig. 7
#func <- c("Ribosome","Aminoacyl.tRNA.biosynthesis")
#nam <- "Translation"
#hi <- 15
Heatmap.genes <- function (func, nam, hi){
  ko.ID<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "KEGG3")
  ko.ID.sub <- data.frame(ko.ID[ko.ID$Level3 %in%  func,])
  
  ko.mgm.sub <- ko.mgm1[row.names(ko.mgm1) %in% ko.ID.sub$KO,]
  ko.ID.sub <- ko.ID.sub[ko.ID.sub$KO %in% row.names(ko.mgm1) ,]
  ko.ID.sub <- ko.ID.sub[order(ko.ID.sub$KO),]
  row.names(ko.mgm.sub) == ko.ID.sub$KO
  ko.ID.sub$KO.id <- paste0(ko.ID.sub$Level3, "_",ko.ID.sub$KO)
  row.names(ko.mgm.sub) <- ko.ID.sub$KO.id
  
  ko.mgm.sub <- ko.mgm.sub[order(ko.ID.sub$No),]
  env.raw1<-env.amp1
  rownames(env.raw1)<-env.raw1$sample_name
  env.sel<-env.raw1[row.names(env.raw1) %in% colnames(ko.mgm.sub),]
  function.sel<-data.frame(t(ko.mgm.sub))
  row.names(env.sel) == row.names(function.sel)
  
  
  env.sel[, paste0(func, ".RA")] <- rowSums(function.sel)
  env.sel[, paste0(func, ".H")] <- vegan::diversity(function.sel)
  env.sel[, paste0(func, ".S")] <- vegan::specnumber(function.sel)
  cog.mgm <- data.frame(t(cog.mgm1))
  row.names(cog.mgm) == row.names(env.sel)
  env.sel$cog.mgm.sum <- rowSums(cog.mgm)
  env.cog <- data.frame(env.sel,cog.mgm )
  env.cog$rib.pct <- env.cog$J/env.cog$cog.mgm.sum
  env.cog$transcript.pct <- env.cog$K/env.cog$cog.mgm.sum
  env.cog$defense.pct <- env.cog$V/env.cog$cog.mgm.sum
  
  env.tmp1<-env.cog[,c("latitude","longitude","altitude","MAP","MAT",
                       "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                       "AllP_abu","AllP_bsa","AllP_rich")]
  
  env.tmp1<-data.frame(apply(env.tmp1,2,as.numeric))
  env.tmp2 <- function.sel
  
  spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
  
  
  r<-data.frame(spman.d12$r)
  r[is.na(r)] <-0
  r$X<-row.names(r)
  
  r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
  p<-data.frame(spman.d12$p)
  p$X<-row.names(p)
  p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
  cor.out<-cbind(r.long,p.long$p)
  cor.out$r <- round(as.numeric(cor.out$r), 2)
  str(cor.out$r)
  
  cor.out$X<- factor(cor.out$X, 
                     levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                                "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
  
  cor.out$X<- factor(cor.out$X, 
                     labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                                "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                                "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                                "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
  library(splitstackshape)
  #cor.out$Y 
  cor.out<-cSplit(cor.out, "Y", "_")
  cor.out$Y_1 <- factor(cor.out$Y_1, levels = func)
  
  p0<-ggplot(cor.out, aes(Y_2,X)) +
    facet_grid(cols = vars(Y_1), scales = "free", space = "free")+
    geom_tile(aes(fill = r), size=1)+
    geom_hline(yintercept = c(8.5,9.5), color = "red")+
    scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
    theme(axis.title= element_blank(),
          axis.text.x=element_text(colour="black", size=8, face="bold",angle = 90),
          axis.text.y=element_text(colour="black", size=12, face="bold"))
  #print(p0)
  
  cor.out.sig <-cor.out
  cor.out.sig$r[cor.out.sig$p > 0.05] <- 0
  
  p1<-ggplot(cor.out.sig, aes(Y_2,X)) + 
    facet_grid(cols = vars(Y_1), scales = "free", space = "free")+
    geom_tile(aes(fill = r), size=1)+
    geom_text(data=  cor.out.sig[cor.out.sig$r!=0,], aes(label = r), size = 1.5, font="bold")+
    geom_hline(yintercept = c(8.5,9.5), color = "red")+
    scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
    theme_bw()+
    theme(axis.title= element_blank(),
          strip.text = element_text(size = 12,face="bold", color = "white"),
          strip.background = element_rect(fill = "blue"),
          panel.spacing = unit(0, "lines"),
          axis.text.x=element_text(colour="black", size=8, face="bold",angle = 90,vjust = 0.5),
          axis.text.y=element_text(colour="black", size=12,face="bold"))
  
  #ggsave(paste0("pdf2/Fig.S4.Heatmap2.",nam,".2023.01.05.pdf"), width = hi, height = 6)
  print(p1)

}

Heatmap.genes(  c("Ribosome","Aminoacyl.tRNA.biosynthesis"), "Translation", 20  )
##  num [1:3280] 0.43 0.28 0.12 -0.47 -0.41 0.33 0.28 0.52 0.52 0.71 ...

#Supplementary Fig. 8
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)

ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.sel))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))

cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

library(splitstackshape)

cor.out<-cSplit(cor.out, "Y", "_")

cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
                                              "Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))

cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
                                             "Energy metabolism","Membrane transport","CC","GDM","MAC"))


cor.out.sub<-cor.out[cor.out$Y_4=="GBM",]
levels(droplevels(factor(cor.out.sub$Y_5)))
##  [1] "Arabinogalactan.biosynthesis.Mycobacterium"                         
##  [2] "Exopolysaccharide.biosynthesis"                                     
##  [3] "Glycosaminoglycan.biosynthesis.chondroitin.sulfate.dermatan.sulfate"
##  [4] "Glycosaminoglycan.biosynthesis.heparan.sulfate.heparin"             
##  [5] "Glycosaminoglycan.degradation"                                      
##  [6] "Glycosphingolipid.biosynthesis.ganglio.series"                      
##  [7] "Glycosphingolipid.biosynthesis.globo.and.isoglobo.series"           
##  [8] "Glycosphingolipid.biosynthesis.lacto.and.neolacto.series"           
##  [9] "Glycosylphosphatidylinositol.anchor.biosynthesis"                   
## [10] "Lipoarabinomannan.biosynthesis"                                     
## [11] "Lipopolysaccharide.biosynthesis"                                    
## [12] "Mannose.type.O.glycan.biosynthesis"                                 
## [13] "N.Glycan.biosynthesis"                                              
## [14] "O.Antigen.nucleotide.sugar.biosynthesis"                            
## [15] "O.Antigen.repeat.unit.biosynthesis"                                 
## [16] "Other.glycan.degradation"                                           
## [17] "Other.types.of.O.glycan.biosynthesis"                               
## [18] "Peptidoglycan.biosynthesis"                                         
## [19] "Teichoic.acid.biosynthesis"                                         
## [20] "Various.types.of.N.glycan.biosynthesis"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, levels = c( "Arabinogalactan.biosynthesis.Mycobacterium", "Exopolysaccharide.biosynthesis", "Glycosaminoglycan.biosynthesis.chondroitin.sulfate.dermatan.sulfate", 
                                                       "Glycosaminoglycan.biosynthesis.heparan.sulfate.heparin", "Glycosaminoglycan.degradation", "Glycosphingolipid.biosynthesis.ganglio.series", 
                                                       "Glycosphingolipid.biosynthesis.globo.and.isoglobo.series", "Glycosphingolipid.biosynthesis.lacto.and.neolacto.series", "Glycosylphosphatidylinositol.anchor.biosynthesis", 
                                                       "Lipoarabinomannan.biosynthesis", "Lipopolysaccharide.biosynthesis", "O.Antigen.nucleotide.sugar.biosynthesis",  
                                                       "O.Antigen.repeat.unit.biosynthesis", "Mannose.type.O.glycan.biosynthesis", 
                                                       "N.Glycan.biosynthesis","Peptidoglycan.biosynthesis","Other.glycan.degradation", "Other.types.of.O.glycan.biosynthesis", "Various.types.of.N.glycan.biosynthesis", 
                                                        "Teichoic.acid.biosynthesis"))

cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c( "ABM", "Exopolysaccharide.biosynthesis", "C", 
                                                       "H", "GD", "S", 
                                                       "GI", "L", "A", 
                                                       "Lipoarabinomannan", "Lipopolysaccharide.biosynthesis", "O.Antigen.nucleotide.sugar.biosynthesis", "ARUB","M.G", 
                                                       "N.Glycan", "Peptidoglycan.biosynthesis", 
                                                       "Other.glycan.D", "O", "VN.glycanB", 
                                                       "Teichoic.acid.biosyn"))

p1<-ggplot(cor.out.sub, aes(Y_1,X)) +
  facet_grid(.~ Y_5,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  geom_hline(yintercept = c(8.5,9.5), color = "red")+
  scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
  theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
        strip.background = element_rect(fill = "red"),
        panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y=element_text(colour="black", size=18, face="bold")) +
  labs(caption="ABM: Arabinogalactan.biosynthesis.Mycobacterium;C: Glycosaminoglycan.biosynthesis.chondroitin.sulfate.dermatan.sulfate; H: Glycosaminoglycan.biosynthesis.heparan.sulfate.heparin; GD: Glycosaminoglycan.degradation; S: Glycosphingolipid.biosynthesis.ganglio.series; GI: Glycosphingolipid.biosynthesis.globo.and.isoglobo.series; L: Glycosphingolipid.biosynthesis.lacto.and.neolacto.series; 
       A: Glycosylphosphatidylinositol.anchor.biosynthesis; Lipoarabinomannan: Lipoarabinomannan.biosynthesis;ARUB: O.Antigen.repeat.unit.biosynthesis; M.G: Mannose.type.O.glycan.biosynthesis; N.Glycan: N.Glycan.biosynthesis; Other.glycan.D: Other.glycan.degradation; O: Other.types.of.O.glycan.biosynthesis; VN.glycanB: Various.types.of.N.glycan.biosynthesis,Teichoic.acid.biosyn: Teichoic.acid.biosynthesis" )
p1

#ggsave("pdf2/Fig.S6a.corrHeatmap.env.Glycan.2023.01.05.pdf", width = 30, height = 7)

env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(cazy.mgm1))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)

r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:2780] 0.24 0.18 -0.22 -0.48 -0.11 0.09 0.12 0.35 0.51 0.61 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))

cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

library(splitstackshape)

cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
cor.out.sig$Y_2<-"Carbohydrate-active enzymes (CAZy)"

p2<-ggplot(cor.out.sig, aes(Y,X)) +
  facet_grid(.~ Y_2,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  geom_text(data=  cor.out.sig[cor.out.sig$r!=0,], aes(label = r), size = 1.5, font="bold")+
  geom_hline(yintercept = c(8.5,9.5), color = "red")+
  scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
  theme(strip.text = element_text(size = 12,face="bold", color = c("white")),
        strip.background = element_rect(fill = "red"),
        axis.title= element_blank(),
        axis.text.x=element_text(colour="black", size=12, face="bold",angle = 90, hjust=1,vjust = 0.5),
        axis.text.y=element_text(colour="black", size=18, face="bold"))
p2

#ggsave("pdf2/Fig.S6b.corrHeatmap.env.cazy.genes.2023.01.05.pdf", width = 20, height = 6)
#Supplementary Fig. 9
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)


ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.sel))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))

cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

library(splitstackshape)

cor.out<-cSplit(cor.out, "Y", "_")

cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
                                              "Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))

cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
                                             "Energy metabolism","Membrane transport","CC","GDM","MAC"))


cor.out.sub<-cor.out[cor.out$Y_4=="CM",]

levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Bacterial.chemotaxis"             "Flagellar.assembly"              
## [3] "Regulation.of.actin.cytoskeleton"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c("Bacterial chemotaxis", "Flagellar assembly","RAC"))

p1<- ggplot(cor.out.sub, aes(Y_1,X)) +
  facet_grid(.~ Y_5,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  geom_hline(yintercept = c(8.5,9.5), color = "red")+
  scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
  theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
        strip.background = element_rect(fill = "red"),
        panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
        axis.text.x=element_text(colour="black", size=10, face="bold",angle = 90, hjust=1,vjust = 0.5),
        axis.text.y=element_text(colour="black", size=15, face="bold")) +
  labs(caption="RAC: Regulation of actin cytoskeleton")

p1

#ggsave("pdf2/Fig.S7a.Heatmap.KEGG.cellular.motility.2023.01.05.pdf", width = 20, height = 6)

ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)

ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.sel))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

library(splitstackshape)

cor.out<-cSplit(cor.out, "Y", "_")

cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
                                              "Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))

cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
                                             "Energy metabolism","Membrane transport","CC","GDM","MAC"))


cor.out.sub<-cor.out[cor.out$Y_4=="Two component system",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Two.component.system"
#cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c("Bacterial chemotaxis", "Flagellar assembly","RAC"))

p2<-ggplot(cor.out.sub, aes(Y_1,X)) +
 facet_grid(.~ Y_5,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  geom_hline(yintercept = c(8.5,9.5), color = "red")+
  scale_fill_gradient(guide = "legend", high='green', low='blue')+
  theme(strip.text = element_text(size = 12,face="bold",color = c("white")),
        strip.background = element_rect(fill = "red"),
        panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
        axis.text.x=element_text(colour="black", size=4, face="bold",angle = 90, hjust=1,vjust = 0.5),
        axis.text.y=element_text(colour="black", size=15, face="bold")) 
p2

#ggsave("pdf2/Fig.S7b.corrHeatmap.env.Signal.transduction.2023.01.05.pdf", width = 20, height = 6)

ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)



ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.sel))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:83460] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)

cor.out<-cSplit(cor.out, "Y", "_")

cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility", "Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Cellular.community.prokaryotes","Pentose.and.glucuronate.interconversions",
                                              "Translation",   "Energy.metabolism",  "Membrane.transport" , "Folding.sorting.and.degradation",
                                              "Metabolism.of.cofactors.and.vitamins"  ))

cor.out$Y_4 <- factor(cor.out$Y_4, labels  = c("BSS","CM", "XenoBDM","Signal transduction","Cellular community","PGI",
                                               "Translation",   "Energy metabolism",  "Membrane transport" , "FSD",
                                               
                                               "Metabolism of cofactors and vitamins"  ))

cor.out.sub<-cor.out[cor.out$Y_4=="Cellular community",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Biofilm.formation" "Quorum.sensing"
p3<-ggplot(cor.out.sub, aes(Y_1,X)) +
  facet_grid(.~ Y_5,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  geom_hline(yintercept = c(8.5,9.5), color = "red")+
  scale_fill_gradient(guide = "legend", high='green', low='blue')+
  theme(strip.text = element_text(size = 12,face="bold", color = c("white")),
        strip.background = element_rect(fill = "red"),
        panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
        axis.text.x=element_text(colour="black", size=4, face="bold",angle = 90, hjust=1),
        axis.text.y=element_text(colour="black", size=15, face="bold"))
p3

#ggsave("pdf2/Fig.S7c.corrHeatmap.env.Cellular.community.2023.01.05.pdf", width = 20, height = 6)
#Supplementary Fig. 10
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)

ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.sel))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

library(splitstackshape)

cor.out<-cSplit(cor.out, "Y", "_")

cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
                                              "Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))

cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
                                             "Energy metabolism","Membrane transport","CC","GDM","MAC"))

cor.out.sub<-cor.out[cor.out$Y_4=="MTP",]
levels(droplevels(factor(cor.out.sub$Y_5)))
##  [1] "Biosynthesis.of.12.14.and.16.membered.macrolides"            
##  [2] "Biosynthesis.of.ansamycins."                                 
##  [3] "Biosynthesis.of.enediyne.antibiotics."                       
##  [4] "Biosynthesis.of.siderophore.group.nonribosomal.peptides."    
##  [5] "Biosynthesis.of.type.II.polyketide.backbone."                
##  [6] "Biosynthesis.of.type.II.polyketide.products."                
##  [7] "Biosynthesis.of.vancomycin.group.antibiotics."               
##  [8] "Carotenoid.biosynthesis."                                    
##  [9] "Diterpenoid.biosynthesis.Including.Gibberellin.biosynthesis."
## [10] "Geraniol.degradation."                                       
## [11] "Insect.hormone.biosynthesis."                                
## [12] "Limonene.and.pinene.degradation."                            
## [13] "Monoterpenoid.biosynthesis."                                 
## [14] "Nonribosomal.peptide.structures."                            
## [15] "Polyketide.sugar.unit.biosynthesis."                         
## [16] "Sesquiterpenoid.and.triterpenoid.biosynthesis."              
## [17] "Terpenoid.backbone.biosynthesis."                            
## [18] "Tetracycline.biosynthesis."                                  
## [19] "Type.I.polyketide.structures."                               
## [20] "Zeatin.biosynthesis."
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, levels = c( "Biosynthesis.of.12.14.and.16.membered.macrolides", "Biosynthesis.of.ansamycins.", "Biosynthesis.of.enediyne.antibiotics.", 
                                                       "Biosynthesis.of.siderophore.group.nonribosomal.peptides.", "Biosynthesis.of.type.II.polyketide.backbone.", "Biosynthesis.of.type.II.polyketide.products.", 
                                                       "Biosynthesis.of.vancomycin.group.antibiotics.", "Carotenoid.biosynthesis.", "Diterpenoid.biosynthesis.Including.Gibberellin.biosynthesis.", 
                                                       "Geraniol.degradation.", "Insect.hormone.biosynthesis.", "Limonene.and.pinene.degradation.",  
                                                       "Monoterpenoid.biosynthesis.", "Nonribosomal.peptide.structures.", 
                                                       "Polyketide.sugar.unit.biosynthesis.","Sesquiterpenoid.and.triterpenoid.biosynthesis.","Terpenoid.backbone.biosynthesis.", "Tetracycline.biosynthesis.", "Type.I.polyketide.structures.", 
                                                        "Zeatin.biosynthesis."))

cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c( "BMM", "Biosynthesis.of.ansamycins", "Biosynthesis.of.enediyne.antibiotics", 
                                                       "Biosynthesis.of.siderophore", "BPB", "Biosyn.polyketide", 
                                                       "Biosyn.vancomycin", "Carotenoid.biosynthesis", "D","Geraniol.D", 
                                                       "IH", "LPD", "M", "Nonribosomal.P","Polyketide.sugar.unit.biosynthesis", 
                                                       "STB", "Terpenoid.backbone.biosynthesis", 
                                                       "TB", "Type.I.polyketide.structures", "ZB"))

p1<-ggplot(cor.out.sub, aes(Y_1,X)) +
  facet_grid(.~ Y_5,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  geom_hline(yintercept = c(8.5,9.5), color = "red")+
  scale_fill_gradient(guide = "legend", high='green', low='blue')+
  theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
        strip.background = element_rect(fill = "red"),
        panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y=element_text(colour="black", size=18, face="bold")) +
  labs(caption="BBM: Biosynthesis.of.12.14.and.16.membered.macrolides;Biosynthesis.of.siderophore: Biosynthesis.of.siderophore.group.nonribosomal.peptides; BPB: Biosynthesis.of.type.II.polyketide.backbone; Biosyn.polyketide: Biosynthesis.of.type.II.polyketide.products; Biosyn.vancomycin: Biosynthesis.of.vancomycin.group.antibiotics; 
  D: Diterpenoid.biosynthesis.Including.Gibberellin.biosynthesis; Geraniol.D: Geraniol.degradation; IH: Insect.hormone.biosynthesis; LPD: Limonene.and.pinene.degradation; M: Monoterpenoid.biosynthesis; Nonribosomal.P: Nonribosomal.peptide.structures; TB: Terpenoid.backbone.biosynthesis; ZB: Zeatin.biosynthesis" )
p1

#ggsave("pdf2/Fig.S8a.corrHeatmap.env.Metabolism.Terpenoid.2023.01.05.pdf", width = 30, height = 7)

ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)



ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.sel))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)

cor.out<-cSplit(cor.out, "Y", "_")

cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
                                              "Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))

cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
                                             "Energy metabolism","Membrane transport","CC","GDM","MAC"))

cor.out.sub<-cor.out[cor.out$Y_4=="XenoBDM",]
levels(droplevels(factor(cor.out.sub$Y_5)))
##  [1] "Aminobenzoate.degradation"                      
##  [2] "Atrazine.degradation"                           
##  [3] "Benzoate.degradation"                           
##  [4] "Bisphenol.degradation"                          
##  [5] "Caprolactam.degradation"                        
##  [6] "Chloroalkane.and.chloroalkene.degradation"      
##  [7] "Chlorocyclohexane.and.chlorobenzene.degradation"
##  [8] "Dioxin.degradation"                             
##  [9] "Drug.metabolism.cytochrome.P450"                
## [10] "Drug.metabolism.other.enzymes"                  
## [11] "Ethylbenzene.degradation"                       
## [12] "Fluorobenzoate.degradation"                     
## [13] "Furfural.degradation"                           
## [14] "Metabolism.of.xenobiotics.by.cytochrome.P450"   
## [15] "Naphthalene.degradation"                        
## [16] "Nitrotoluene.degradation"                       
## [17] "Polycyclic.aromatic.hydrocarbon.degradation"    
## [18] "Steroid.degradation"                            
## [19] "Styrene.degradation"                            
## [20] "Toluene.degradation"                            
## [21] "Xylene.degradation"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c( "Aminobenzoate.degradation", "Atrazine.degradation", "Benzoate.degradation", 
                                                       "Bisphenol.degradation", "Caprolactam.degradation", "Chloroalkane.and.chloroalkene.degradation", 
                                                       "Chlorocyclohexane.and.chlorobenzene.degradation", "Dioxin.degradation", "Drug.metabolism.cytochrome.P450", 
                                                       "Drug.metabolism.other.enzymes", "Ethylbenzene.degradation", "Fluorobenzoate.degradation", 
                                                       "Furfural.degradation", "Metabolism.of.xenobiotics.by.cytochrome.P450",  
                                                       "Naphthalene.degradation", "Nitrotoluene.degradation", "Polycyclic.aromatic.hydrocarbon.degradation", 
                                                       "Steroid.degradation", "Styrene.degradation", "Toluene.degradation", 
                                                       "Xylene.degradation"))

cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c( "Aminobenzoate.degradation", "AtrD", "Benzoate.degradation", 
                                                       "B", "CapD", "ChloroalkaneD", 
                                                       "ChlcbD", "DioxinD", "Dc", 
                                                       "DrugO", "EbD", "FbD", 
                                                       "FfD", "Mxc", "NaD", 
                                                       "NiD", "PcahD", "SteroidD", 
                                                       "StyreneD", "TolueneD", "XyleneD"))

p2<-ggplot(cor.out.sub, aes(Y_1,X)) +
  facet_grid(.~ Y_5,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  geom_hline(yintercept = c(8.5,9.5), color = "red")+
  scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
  theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
        strip.background = element_rect(fill = "red"),
        panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y=element_text(colour="black", size=18, face="bold")) +
  labs(caption="AtrD: Atrazine.degradation; B: Bisphenol.degradation; CapD: Caprolactam.degradation; ChlcbD: Chloroalkane.and.chloroalkene.degradation; ChlorocyclohexaneD: Chlorocyclohexane.and.chlorobenzene.degradation; DioxinD: Dioxin.degradation; Dc: Drug.metabolism.cytochrome.P450.; DrugO: Drug.metabolism.other.enzymes; EbD: Ethylbenzene.degradation; FbD: Fluorobenzoate.degradation; FfD: Furfural.degradation;
       Mxc: Metabolism.of.xenobiotics.by.cytochrome; NaD: Naphthalene.degradation; NiD: Nitrotoluene.degradation; PcahD: Polycyclic.aromatic.hydrocarbon.degradation; SD: Steroid.degradation; StyreneD: Styrene.degradation; TolueneD: Toluene.degradation; XyleneD: Xylene.degradation" )

p2

#ggsave("pdf2/Fig.S8b.corrHeatmap.env.XenoBDM.2023.01.05.pdf", width = 30, height = 7)

env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(rgi.mgm1))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:21120] -0.38 -0.28 -0.28 0.17 0.48 -0.23 -0.1 -0.19 -0.32 -0.35 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)

cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
cor.out.sig$Y_2<-"Antibiotic resistance genes (ARG)"

p3<-ggplot(cor.out.sig, aes(Y,X)) +
  facet_grid(.~ Y_2,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  geom_hline(yintercept = c(8.5,9.5), color = "red")+
  scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
  theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
        strip.background = element_rect(fill = "red"),
        panel.spacing = unit(0.1, "lines"),
        axis.title= element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y=element_text(colour="black", size=18, face="bold"))
p3

#ggsave("pdf2/Fig.S8c.corrHeatmap.env.ARG.genes.2023.01.09.pdf", width = 30, height = 7)
#Supplementary Fig. 11
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)

ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.sel))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)

cor.out<-cSplit(cor.out, "Y", "_")

cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
                                              "Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))

cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
                                             "Energy metabolism","Membrane transport","CC","GDM","MAC"))

cor.out.sub<-cor.out[cor.out$Y_4=="Energy metabolism",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Carbon.fixation.pathways.in.prokaryotes"
## [2] "Methane.metabolism"                     
## [3] "Nitrogen.metabolism"                    
## [4] "Oxidative.phosphorylation"              
## [5] "Sulfur.metabolism"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c("Carbon.fixation", "Methane.metabolism",  "Nitrogen.M",  "Oxidative.phosphorylation",   "Sulfur.metabolism" ))

p1<-ggplot(cor.out.sub, aes(Y_1,X)) +
  facet_grid(.~ Y_5,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  geom_hline(yintercept = c(8.5,9.5), color = "red")+
  scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
  theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
        strip.background = element_rect(fill = "blue"),
        panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y=element_text(colour="black", size=8, face="bold")) +
  labs(caption="Nitrogen.M: Nitrogen metabolism")
p1

#ggsave("pdf2/Fig.S9a.corrHeatmap.env.Energy.metabolism.2023.01.05.pdf", width = 10, height = 3)

ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)

ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.sel))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))

cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

library(splitstackshape)

cor.out<-cSplit(cor.out, "Y", "_")

cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
                                              "Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))

cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
                                             "Energy metabolism","Membrane transport","CC","GDM","MAC"))

cor.out.sub<-cor.out[cor.out$Y_4=="Membrane transport",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "ABC.transporters"          "Phosphotransferase.system"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c("ABC.transporters" , "PS" ))

p2<-ggplot(cor.out.sub, aes(Y_1,X)) +
  facet_grid(.~ Y_5,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  geom_hline(yintercept = c(8.5,9.5), color = "red")+
  scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
  theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
        strip.background = element_rect(fill = "blue"),
        panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y=element_text(colour="black", size=8, face="bold")) +
  labs(caption="PS: Phosphotransferase.system")
p2

#ggsave("pdf2/Fig.S9b.corrHeatmap.env.Membrane.transport.2023.01.05.pdf", width = 10, height = 3)

R Markdown